# -*- coding:utf-8 -*-
import re

from spider.common.util.HtmlUtil import HtmlUtil
from spider.common.util.SQLUtil import SafeMySQLCRUD

if __name__ == "__main__":
    sqlUtil = SafeMySQLCRUD()
    executeSQL="select * FROM page"
    datas=sqlUtil.retrieve_jsons(table='page',condition="code='131000'",show_sql=True)
    htmlUtil=HtmlUtil()

    saveDatas=[]
    for data in datas:
        html=data['html']
        tr_xpath='//tbody//tr'
        td_xpath='//td'
        trs=htmlUtil.find_elements(html=html,xpath_exp=tr_xpath)[1:]
        for tr in trs:
            saveData = {}
            saveData['id']=0
            saveData['pageId']=data['id']
            td_eles = htmlUtil.find_elements(html=htmlUtil.to_str(tr), xpath_exp=td_xpath)
            saveData['area'] = td_eles[0].text
            saveData['class'] = td_eles[1].text
            title=htmlUtil.extarct_view_text(td_eles[2])
            saveData['title'] = title.replace("\n", "").replace("\\n", "").strip()
            original= htmlUtil.extarct_view_href(td_eles[2])
            saveData['url']=re.sub(r'[\\"]', '', original)
            saveData['time'] = td_eles[3].text
            saveData['views'] = td_eles[4].text
            saveDatas.append(saveData)
    sqlUtil.batch_insert(table='list',data_list=saveDatas)